*WIC co-op eWIC new diff in diff analysis
*last modified: 21 December 2024
*last modified by: Kathya Tapia-Schythe
*this do file implements the csdid2 package to estimate individual 2x2 doubly robust did estimates and aggregate to obtain an att 

*-------------------------------------------------------------------------------
*--- Preamble
*-------------------------------------------------------------------------------

clear all
set more off
set rmsg on

set maxvar 120000
set emptycells drop

*ssc install csdid, replace
*ssc install drdid, replace
*net install csdid2, from("https://friosavila.github.io/stpackages")

*-------------------------------------------------------------------------------
*--- Directories and Log 
*-------------------------------------------------------------------------------

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/ewic_csdid_lzip`date', replace

*-------------------------------------------------------------------------------
*--- Data and Frames 
*-------------------------------------------------------------------------------

*import cleaned eWIC implementation info into own frame
*eWIC implementation variable is at county fips/fiscal year level 
*partition data into state/treatment year groups

*import cleaned ewic implementation info into own frame
mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

mkf income
frame income{
	use ./data/cleaned/zipyearmedianincome.dta, clear
	keep if year == 2011
	drop year
	destring zip, replace
}

cwf ewic_fy

*import two separate cleaned redemptions datasets (second is balanced to only include ZIPs that have non-missing redemptions in all years)
mkf red_fy
frame red_fy{
	use ./data/cleaned/tip_wic_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
}

mkf red_bal_fy
*load balanced redemptions as an attempted fix
frame red_bal_fy{
	use ./data/cleaned/tip_wic_bal_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
}

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	*get st_fips
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	bys st_fips: egen state_mode = mode(state)
	replace state = state_mode if state != state_mode & !missing(state_mode)
	drop str_fips state_mode

	*drop Vermont, Mississippi, + missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)
	
	*link in low income/high poverty info
	frlink m:1 zip, frame(income) generate(inc_link)
	frget lowinc highpov, from(inc_link)

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)

	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	
	*this zip has multiple implementation dates and can't be reconciled
	drop if zip == 42223
	
	*put fields necessary to create a frame of unique zip/year obs with ewic implementation; later will merge this to tip_fy again and then collapse to get aggregated zip level outcomes
	compress
	frame put zip fiscalyear ev_year state st_fips lowinc highpov, into(zip_sq_frame)
	*put all obs into frame to use for zip collapsing
	frame put _all, into(zip_sq)
}

frame zip_sq_frame{
	duplicates drop 
	
	bys zip: assert state[1] == state[_N]
	bys zip: assert st_fips[1] == st_fips[_N]
	*resolve discrepancies in values of ewic implementation variables as follows:
	*ev_year is equal to the min of ev_year across the zip/fiscal year (use the earliest possible event year)
	bys zip: egen ey_min = min(ev_year)
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys zip: assert ev_year[1] == ev_year[_N]

	duplicates drop

	duplicates report zip fiscalyear 
	assert r(N) == r(unique_value)	
	compress
}

*collapse to zip/fiscalyear level and then link ewic policy variables back in
frame zip_sq{
	*reshape by chain status to allow looking at chain status as heterogeneity later; may also want to do this for a50 or vendor type at some point
	collapse (sum) auth (first) state st_fips lowinc highpov, by(zip fiscalyear chain)
	gen cstring = ""
	replace cstring = "chain" if chain == 1
	replace cstring = "indep" if chain == 0
	drop chain
	reshape wide auth, i(zip fiscalyear lowinc highpov) j(cstring) string
	assert !missing(zip) & !missing(fiscalyear)
	egen auth = rowtotal(auth*)
	recode authindep authchain (missing = 0)
	*start linking in other policy variables and outcomes (redemptions)
	frlink 1:1 zip fiscalyear, frame(zip_sq_frame) generate(ewic_link)
	frlink 1:1 zip fiscalyear, frame(red_fy) generate(red_link)
	frlink 1:1 zip fiscalyear, frame(red_bal_fy) generate(red_bal_link)
	frget ev_year , from(ewic_link)
	frget wicred, from(red_link)
	frget wicbalred = wicred, from(red_bal_link)
	compress
	assert !missing(ev_year) & !missing(fiscalyear) & !missing(zip)
	xtset zip fiscalyear
	compress
	*log 
	foreach var of varlist auth authindep authchain {
		format `var' %11.6f
		local r = abs(rnormal(0, 0.000001))
		tempvar zero
		gen `zero' = `var' == 0
		replace `var' = `r' if `zero' == 1
		gen double l`var' = log(`var')
		*check that log conversion using errors worked
		assert !missing(l`var') if !missing(`var')
		replace `var' = 0 if `zero' == 1
 	}
	foreach var of varlist wicred wicbalred{
		gen double l`var' = log(`var')
		bys zip: egen std`var' = std(`var')
		gen double `var'hk = `var'/100000
	}
	*standardize vars to see if this helps with left censoring
	foreach var of varlist auth authindep authchain{
		*use built in egen fn for this (mean zero, sd 1)
		bys zip: egen std`var' = std(`var'), mean(0) sd(1)
	}
	compress
}

mkf csdidsimple float(b se pval ll ul) str25(outcome subsample)
mkf csdidevent float(b se pval ll ul) int(ev_yr) str25(outcome subsample)
mkf csdideventavg float(avgb avgse avgpval avgll avgul) int(times) str25(time outcome subsample)
mkf csdidgroup float(b se pval ll ul) str25(gr_yr outcome subsample)

*-------------------------------------------------------------------------------
*--- Estimates: Unbalanced Sample
*-------------------------------------------------------------------------------

*a.- All
cwf zip_sq
cd `out_dir'
csdid lwicred, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_lwicred) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("lwicred") ("all")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("lwicred") ("all")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("lwicred") ("all")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("lwicred") ("all")
}

estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("lwicred") ("all")
}

*b.- Low Income
cwf zip_sq
frame put _all if lowinc == 1, into(lowinc)
cwf lowinc

cd `out_dir'
csdid lwicred, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_lwicredlowinc) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("lwicred") ("lowinc")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("lwicred") ("lowinc")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("lwicred") ("lowinc")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("lwicred") ("lowinc")
}
estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("lwicred") ("lowinc")
}

*c.- High Poverty
cwf zip_sq
frame put _all if highpov == 1, into(highpov)
cwf highpov
frame drop lowinc

cd `out_dir'
csdid lwicred, ivar(zip) time(fiscalyear) gvar(ev_year) notyet saverif(csdid_zip_lwicredhighpov) replace cluster(st_fips)
estat simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("lwicred") ("highpov")
estat event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("lwicred") ("highpov")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("lwicred") ("highpov")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("lwicred") ("highpov")
}
estat group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("lwicred") ("highpov")
}

*-------------------------------------------------------------------------------
*--- Export Results
*-------------------------------------------------------------------------------

frame csdideventavg{
	expand times, generate(new)
	bys time outcome subsample (new): gen ev_yr = sum(new)
	replace ev_yr = ev_yr - 4 if time == "pre"
	drop time new times
}

cwf csdidevent
frlink 1:1 ev_yr outcome subsample, frame(csdideventavg)
frget avgb avgse avgpval avgll avgul, from(csdideventavg)
drop csdideventavg

frame drop csdideventavg

cwf csdidsimple
frame drop highpov
save `out_dir'/csdid_lzip_simple.dta, replace
cwf csdidevent
save `out_dir'/csdid_lzip_event.dta, replace  
cwf csdidgroup
save `out_dir'/csdid_lzip_group.dta, replace  


log close 
